### This script plots the analyses to compare different ways to incorporate the PDG
### into the Zonation analyses (Fig. 4)
### Libraries
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
### Read data
## Read output data generated by the script spatial_analyses_zonationVSproxiesdivgen_all_ms.R
## this shows the area per taxa and proxies of genetic diversity within it
## considering the Zonation solution for 20% of the country considering only
## taxa distribution models (no habitat, etc).
sols_summary_spp<-read.csv("../data/comparations_output/sols_summary_spp.txt")
# check data
head(sols_summary_spp)
## Solution Richness shannon simpson Area Prop_to_AreaSP
## 1 ENM_sp 64 3.216956 0.9341559 473887 1.0000000
## 2 Scenario_SDM 58 2.908383 0.9179912 127904 0.2699040
## 3 Scenario_SDM_LZ 59 2.929987 0.9210733 127082 0.2681694
## 4 Scenario_SDM_PGD 61 3.565616 0.9677778 142830 0.3014010
## 5 Scenario_SDM_vs_PGD 64 3.718850 0.9720398 124221 0.2621321
## 6 Scenario_SDM_PGD_ADMU 63 3.182794 0.9418937 129665 0.2736201
## mean.prop median.prop Dist.to.Optimun sp
## 1 NA NA 0.0 Capsicum annuum glabriusculum
## 2 0.3786268 0.2580767 100555.5 Capsicum annuum glabriusculum
## 3 0.4083096 0.3092418 104980.3 Capsicum annuum glabriusculum
## 4 0.5480085 0.4495986 104108.5 Capsicum annuum glabriusculum
## 5 0.6525528 0.6421699 106906.7 Capsicum annuum glabriusculum
## 6 0.4407066 0.3329768 100121.0 Capsicum annuum glabriusculum
### Analyses and plots
## Check mean proportion of taxa area and mean proportion of proxies within it, per solution
group_by(sols_summary_spp, Solution) %>%
summarise(., mean.prop.sp.area = mean(Prop_to_AreaSP) , mean.prop.proxies = mean(mean.prop))
## # A tibble: 6 × 3
## Solution mean.prop.sp.area mean.prop.proxies
## <chr> <dbl> <dbl>
## 1 ENM_sp 1 NA
## 2 Scenario_SDM 0.442 0.514
## 3 Scenario_SDM_LZ 0.395 0.530
## 4 Scenario_SDM_PGD 0.338 0.558
## 5 Scenario_SDM_PGD_ADMU 0.448 0.641
## 6 Scenario_SDM_vs_PGD 0.386 0.746
### Plots
## Violin plots
# Simpson
filter(sols_summary_spp, Solution!="ENM_sp") %>%
ggplot(., aes(x=Solution, y=simpson, color=Solution)) + geom_violin() + geom_jitter(size = 0.3) +
stat_summary(fun.y=mean, geom="point", color="red") +
theme(axis.text.x = element_blank()) +
scale_y_continuous(name="Simpson Index")
## Warning: `fun.y` is deprecated. Use `fun` instead.

# Area taxon conserved
filter(sols_summary_spp, Solution!="ENM_sp") %>%
ggplot(., aes(x=Solution, y=Prop_to_AreaSP, color=Solution)) + geom_violin() + geom_jitter(size = 0.3) +
stat_summary(fun.y=mean, geom="point", color="red") +
theme(axis.text.x = element_blank()) +
scale_y_continuous(name="Proportion of distribution of the taxon conserved")
## Warning: `fun.y` is deprecated. Use `fun` instead.

# Mean prop area proxies by taxon conserved
plot_b <-filter(sols_summary_spp, Solution!="ENM_sp") %>%
ggplot(., aes(x=Solution, y=mean.prop, color=Solution)) + geom_violin() + geom_jitter(size = 0.3) +
stat_summary(fun.y=mean, geom="point", color="red") +
theme(axis.text.x = element_blank()) +
scale_y_continuous(name="Mean proportion of the area of each proxy conserved by taxon")
## Warning: `fun.y` is deprecated. Use `fun` instead.
plot_b

# Median prop area proxies by taxon ponserved
filter(sols_summary_spp, Solution!="ENM_sp") %>%
ggplot(., aes(x=Solution, y=median.prop, color=Solution)) + geom_violin() + geom_jitter(size = 0.3) +
stat_summary(fun.y=mean, geom="point", color="red") +
theme(axis.text.x = element_blank()) +
scale_y_continuous(name="Median proportion of the area of each proxy conserved by taxon")
## Warning: `fun.y` is deprecated. Use `fun` instead.

# Median and mean together
filter(sols_summary_spp, Solution!="ENM_sp") %>%
gather(., SummaryStat, value, -Solution, -c(Richness:Prop_to_AreaSP), -Dist.to.Optimun, -sp) %>%
ggplot(., aes(x=interaction(SummaryStat, Solution), y=value, color=SummaryStat)) + geom_violin() + geom_jitter(size = 0.1) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Proportion of the distribution of the taxon conserved vs MEAN proportion of each proxy conserved by taxon
filter(sols_summary_spp, Solution!="ENM_sp") %>%
ggplot(., aes(x=Prop_to_AreaSP, y=mean.prop, color=Solution)) + geom_point() +
scale_x_continuous(name="Proportion of distribution of the taxon conserved") +
scale_y_continuous(name="Mean proportion of the area of each proxy conserved by taxon")

# Proportion of the distribution of the taxon conserved vs MEADIAN proportion of each proxy conserved by taxon
filter(sols_summary_spp, Solution!="ENM_sp") %>%
ggplot(., aes(x=Prop_to_AreaSP, y=median.prop, color=Solution)) + geom_point() +
scale_x_continuous(name="Proportion of distribution of the taxon conserved") +
scale_y_continuous(name="Median proportion of the area of each proxy conserved by taxon")

## with regression lines
plot_a <-filter(sols_summary_spp, Solution!="ENM_sp") %>%
# plot points
ggplot(., aes(x=Prop_to_AreaSP, y=mean.prop, color=Solution)) + geom_point(size=0.7) +
scale_x_continuous(name="Proportion of taxa distribution (%)") +
scale_y_continuous(name="Mean proportion of area of proxies of genetic differentiation by taxa (%)",
breaks = seq(0, 1, by = 0.25), expand = c(0, 0)) +
# plot fitted curve
geom_smooth(method=loess, aes(fill=Solution))
plot_a
## `geom_smooth()` using formula 'y ~ x'

### Plot for paper figure (Fig 4):
plot_a <- plot_a +
# nicer background
theme_bw()+ theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black")) +
# nicer legend (fill and color are needed becasue we are plotting both points smooth and points)
scale_fill_discrete(name = "Scenarios",
labels = c("SDM (n=116)",
"SDM+LZ (n=143)", "SDM+PGD (n=218)",
"SDM*PGD (n=5004)", "SDM and PGD as ADMU (n=117)")) +
scale_color_discrete(name = "Scenarios",
labels = c("SDM (n=116)",
"SDM+LZ (n=143)", "SDM+PGD (n=218)",
"SDM*PGD (n=5004)", "SDM and PGD as ADMU (n=117)")) +
theme(legend.position = c(.99, 0.01), legend.justification = c("right", "bottom")) +
# title
labs(title="a)") +
# larger text
theme(
plot.title = element_text(size=14, face="bold"),
axis.title = element_text(size=14),
axis.text = element_text(size=13),
legend.text = element_text(size=13),
legend.title = element_text(size=13, face="bold"))
plot_a
## `geom_smooth()` using formula 'y ~ x'

plot_b <- plot_b +
# nicer background
theme_bw()+ theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black")) +
# no legend
theme(legend.position = "none") +
# nicar x names
scale_x_discrete(name="Scenarios",
labels=c("SDM",
"SDM+LZ", "SDM+PGD",
"SDM*PGD", "as ADMU")) +
# title
labs(title="b)") +
# larger text
theme(
plot.title = element_text(size=14, face="bold"),
axis.title = element_text(size=14),
axis.text = element_text(size=13))
plot_b

## Plot Together
# Multiple plot function taken from http://www.cookbook-r.com/Graphs/Multiple_graphs_on_one_page_(ggplot2)/
#
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)
# - cols: Number of columns in layout
# - layout: A matrix specifying the layout. If present, 'cols' is ignored.
#
# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),
# then plot 1 will go in the upper left, 2 will go in the upper right, and
# 3 will go all the way across the bottom.
#
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
library(grid)
# Make a list from the ... arguments and plotlist
plots <- c(list(...), plotlist)
numPlots = length(plots)
# If layout is NULL, then use 'cols' to determine layout
if (is.null(layout)) {
# Make the panel
# ncol: Number of columns of plots
# nrow: Number of rows needed, calculated from # of cols
layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
ncol = cols, nrow = ceiling(numPlots/cols))
}
if (numPlots==1) {
print(plots[[1]])
} else {
# Set up the page
grid.newpage()
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
# Make each plot, in the correct location
for (i in 1:numPlots) {
# Get the i,j matrix positions of the regions that contain this subplot
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
layout.pos.col = matchidx$col))
}
}
}
multiplot(plot_a, plot_b, cols=2)
## `geom_smooth()` using formula 'y ~ x'

## Supplementary analysis suggested during review:
# Read IUCN category per taxa
iucn<-read.csv("../data/spatial/Zonation_final_solutions/IUCN_threat_category.csv")
# change names to fit sols_summary_spp names
iucn<-mutate(iucn, Nombre_=gsub("_", " ", iucn$Nombre_)) %>%
# keep only needed variables
select(.,-Taxa)
# add iucn data to Zonation solutions
sols_summary_spp<-left_join(sols_summary_spp, iucn, by =c("sp" = "Nombre_")) %>%
# Get Genus
separate(col=sp,
into=c("Genus", "Species"),
sep=" ", extra="merge", remove=FALSE)
# For the SDM*PGD curve of Fig 4a, get which taxa have less than 60% PGD represented
filter(sols_summary_spp, Solution=="Scenario_SDM_vs_PGD", mean.prop<0.60) %>%
select(., mean.prop, Area, IUCN.threat.category, sp)
## mean.prop Area IUCN.threat.category sp
## 1 0.3748263 11810 DD Cucurbita cordata
## 2 0.4883239 32037 LC Cucurbita digitata
## 3 0.5813091 12183 LC Cucurbita lundelliana
## 4 0.5175327 13114 DD Cucurbita palmata
## 5 0.3760386 17106 VU Gossypium davidsonii
## 6 0.5506088 53446 NT Persea podadenia
## 7 0.5892198 30141 LC Phaseolus angustissimus
## 8 0.5311420 31513 LC Phaseolus filiformis
## 9 0.5789563 144453 LC Phaseolus lunatus silvester
## 10 0.5429136 87717 <NA> Physalis acolia
## 11 0.5492944 277396 LC Physalis angulata
## 12 0.5622299 46563 LC Physalis crassifolia
## 13 0.5918319 316674 LC Physalis hederifolia
## 14 0.5959233 328147 LC Physalis patula
## 15 0.5527966 305552 LC Physalis pruinosa
## 16 0.5600602 401532 LC Physalis pubescens
## 17 0.5835505 71380 LC Tripsacum lanceolatum
# For the SDM*PGD curve of Fig 4a, get which taxa have >90% PGD represented
filter(sols_summary_spp, Solution=="Scenario_SDM_vs_PGD", mean.prop>0.90) %>%
select(., mean.prop, Area, IUCN.threat.category, sp)
## mean.prop Area IUCN.threat.category sp
## 1 0.9119520 21704 EN Capsicum lanceolatum
## 2 0.9348443 12008 VU Gossypium gossypioides
## 3 0.9530186 14472 EN Persea albida
## 4 0.9010968 8179 LC Persea caerulea
## 5 0.9115334 10910 VU Persea donnell smithii
## 6 0.9036658 27794 EN Persea pallescens
## 7 0.9131448 9487 LC Persea vesticula
## 8 0.9184310 22707 LC Phaseolus jaliscanus
## 9 1.0000000 341 LC Physalis ignota
## 10 0.9397357 4969 LC Solanum agrimonifolium
## 11 1.0000000 142 VU Solanum clarum
## 12 0.9384119 24517 LC Solanum hougasii
## 13 0.9522770 10185 NT Solanum trifidum
## 14 0.9728131 4788 EN Zea diploperennis
# Examine if there is a pattern on which have more or less PGD represented
# By genus
plot_genus <-filter(sols_summary_spp, Solution=="Scenario_SDM_vs_PGD") %>%
# plot points
ggplot(., aes(x=Prop_to_AreaSP, y=mean.prop, color=Genus)) + geom_point(size=1.1) +
scale_x_continuous(name="Proportion of taxa distribution (%)") +
scale_y_continuous(name="Mean proportion of area of proxies of genetic differentiation by taxa (%)",
breaks = seq(0, 1, by = 0.25), expand = c(0, 0)) +
theme_bw()
plot_genus

# by IUCN
iucn.cols<-c("#D81E05", "#FC7F3F", "#F9E814", "#CCE226", "#60C659", "#D1D1C6") # respectively for "CR", "EN", "VU", "NT", "LC", "DD"
plot_iucn <-filter(sols_summary_spp, Solution=="Scenario_SDM_vs_PGD") %>%
# plot points
ggplot(., aes(x=Prop_to_AreaSP, y=mean.prop, color=IUCN.threat.category)) + geom_point(size=1.1) +
scale_x_continuous(name="Proportion of taxa distribution (%)") +
scale_y_continuous(name="Mean proportion of area of proxies of genetic differentiation by taxa (%)",
breaks = seq(0, 1, by = 0.25), expand = c(0, 0)) +
scale_color_manual(values= iucn.cols,
breaks= c("CR", "EN", "VU", "NT", "LC", "DD"),
name= "Threat category") +
theme_bw()+ labs(title="a)")
plot_iucn

# plot by area
plot_area <-filter(sols_summary_spp, Solution=="Scenario_SDM_vs_PGD") %>%
# plot points
ggplot(., aes(x=Prop_to_AreaSP, y=mean.prop, color=Area)) + geom_point(size=1.1) +
scale_x_continuous(name="Proportion of taxa distribution (%)") +
scale_y_continuous(name="Mean proportion of area of proxies of genetic differentiation by taxa (%)",
breaks = seq(0, 1, by = 0.25), expand = c(0, 0)) +
theme_bw() + labs(title="b)")
plot_area

multiplot(plot_iucn, plot_area, cols=1)

# Get session info
sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur ... 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] tidyr_1.2.0 dplyr_1.0.9 ggplot2_3.3.6
##
## loaded via a namespace (and not attached):
## [1] bslib_0.3.1 compiler_4.2.1 pillar_1.7.0 jquerylib_0.1.4
## [5] highr_0.9 tools_4.2.1 digest_0.6.29 lattice_0.20-45
## [9] nlme_3.1-157 jsonlite_1.8.0 evaluate_0.15 lifecycle_1.0.1
## [13] tibble_3.1.7 gtable_0.3.0 mgcv_1.8-40 pkgconfig_2.0.3
## [17] rlang_1.0.3 Matrix_1.4-1 DBI_1.1.3 cli_3.3.0
## [21] rstudioapi_0.13 yaml_2.3.5 xfun_0.31 fastmap_1.1.0
## [25] withr_2.5.0 stringr_1.4.0 knitr_1.39 generics_0.1.3
## [29] sass_0.4.1 vctrs_0.4.1 tidyselect_1.1.2 glue_1.6.2
## [33] R6_2.5.1 fansi_1.0.3 rmarkdown_2.14 farver_2.1.1
## [37] purrr_0.3.4 magrittr_2.0.3 splines_4.2.1 scales_1.2.0
## [41] htmltools_0.5.2 ellipsis_0.3.2 assertthat_0.2.1 colorspace_2.0-3
## [45] labeling_0.4.2 utf8_1.2.2 stringi_1.7.6 munsell_0.5.0
## [49] crayon_1.5.1